#==================================================================#
#
# Voters Punish Politicians with Depression
# Peter John Loewen and Ludovic Rheault
# British Journal of Political Science
# (Online Appendix)
#
# R script for results in the online appendix
#
#===================================================================#

# Loading required packages
library(ggstance)
library(ggplot2)
library(zeligverse)
library(xtable)
library(stargazer)
library(car)

usafile = "depression_usa.csv"
df_usa = read.csv(usafile)
canfile = "depression_can.csv"
df_can = read.csv(canfile)

# Reordering levels for presentation
df_usa$illnessC = factor(df_usa$illnessC,
                         levels = c('Flu', 'Skin Cancer', 'Depression'))
df_can$illnessC = factor(df_can$illnessC,
                         levels = c('Flu', 'Skin Cancer', 'Depression'))

# Prior experience with depression
df_usa$dep = ifelse(df_usa$depression_exp=="Yes", 1, 0)
df_can$dep = ifelse(df_can$depression_exp=="Yes", 1, 0)

# Convenience function for tables
pTable = function(v1, v2 = NULL, digits = NULL){
  # Nicely formatted proportion table with rounded values.
  if (missing(digits)){
    digits = 3
  }
  if (missing(v2)){
    return(round(prop.table(table(v1)), digits))
  }
  else {
    return(round(prop.table(table(v2, v1), m = 1), digits))
  }
}


#===================================================================#
# Tables A1-A2. Balance Checks
#===================================================================#

usa1 = glm(ab_depression_numeric ~
            age +
            gender +
            education +
            dep +
            pid +
            candidate_A_age +
            candidate_A_gender +
            candidate_A_race +
            candidate_A_party +
            candidate_B_age +
            candidate_B_gender +
            candidate_B_race, data=df_usa, family=binomial(link="logit"))
usa2 = glm(cd_depression_numeric ~ age +
            gender +
            education +
            dep +
            pid +
            candidate_C_age +
            candidate_C_gender +
            candidate_C_race +
            candidate_C_party +
            candidate_D_age +
            candidate_D_gender +
            candidate_D_race,
           data=df_usa, family=binomial(link="logit"))
varnames_usa = c("Age",
            "Gender",
            "Education",
            "Experience with Depression",
            "Republican Identifier",
            "Candidate A Age",
            "Candidate A (Gender = Male)",
            "Candidate A (Race = Hispanic)",
            "Candidate A (Race = African-American) ",
            "Candidate A (Party = Republican)",
            "Candidate B Age",
            "Candidate B (Gender = Male)",
            "Candidate B (Race = Hispanic)",
            "Candidate B (Race = African-American)",
            "Candidate C Age",
            "Candidate C (Gender = Male)",
            "Candidate C (Race = Hispanic)",
            "Candidate C (Race = African-American) ",
            "Candidate C (Party = Republican)",
            "Candidate D Age",
            "Candidate D (Gender = Male)",
            "Candidate D (Race = Hispanic)",
            "Candidate D (Race = African-American)")

stargazer(usa1, usa2, star.cutoffs = c(0.05,0.01,0.001), 
          covariate.labels=varnames_usa, header=FALSE)

can1 = glm(ab_depression_numeric ~
            age +
            gender +
            education +
            dep +
            candidate_A_age +
            candidate_A_gender +
            candidate_A_race +
            candidate_A_party +
            candidate_B_age +
            candidate_B_gender +
            candidate_B_race, data=df_can, family=binomial(link="logit"))
can2 = glm(cd_depression_numeric ~
            age +
            gender +
            education +
            dep +
            candidate_C_age +
            candidate_C_gender +
            candidate_C_race +
            candidate_C_party +
            candidate_D_age +
            candidate_D_gender +
            candidate_D_race,
            data=df_can, family=binomial(link="logit"))
varnames_can = c("Age",
               "Gender",
               "Education",
               "Experience with Depression",
               "Ill Candidate Age",
               "Ill Candidate (Gender = Male)",
               "Ill Candidate (Race = African-Canadian)",
               "Ill Candidate (Race = Asian Canadian) ",
               "Ill Candidate (Party = Liberal)",
               "Healthy Candidate Age",
               "Healthy Candidate (Gender = Male)",
               "Healthy Candidate (Race = African-Canadian)",
               "Healthy Candidate (Race = Asian Canadian)",
               "Ill Candidate Age",
               "Ill Candidate (Gender = Male)",
               "Ill Candidate (Race = African-Canadian)",
               "Ill Candidate (Race = Asian Canadian) ",
               "Ill Candidate (Party = Liberal)",
               "Healthy Candidate Age",
               "Healthy Candidate (Gender = Male)",
               "Healthy Candidate (Race = African-Canadian)",
               "Healthy Candidate (Race = Asian Canadian)")

stargazer(can1, can2, star.cutoffs = c(0.05,0.01,0.001), 
          covariate.labels=varnames_can, header=FALSE)

#===================================================================#
# Figure A1. Conditional effects, by partisan affiliation
# (Incumbent Session, USA)
#===================================================================#

df_usa$repcan = ifelse(df_usa$candidate_C_party=="Republican",1,0)
df_usa$pidf = as.factor(df_usa$pid)
levels(df_usa$pidf)=c("Democrat","Independent","Republican")

# Reset seed for pseudo-random numbers
set.seed(42)

# Republican Candidate
# For party ID==Republican:
z = zelig(cd_vote_numeric ~ cd_depression_numeric,
          model = "logit", 
          data = df_usa[df_usa$pidf=="Republican" & df_usa$repcan==1,],
          cite=FALSE)
x0 = setx(z, cd_depression_numeric = 0)
x1 = setx(z, cd_depression_numeric = 1)
sout = sim(z, x = x0, x1 = x1)

# Accumulating values for plot:
s = data.frame(ate = mean(unlist(sout[["sim.out"]][["x1"]][["fd"]])),
               lb = quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.025),
               ub = quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.975))

# For party ID==Independent:
z = zelig(cd_vote_numeric ~ cd_depression_numeric,
          model = "logit", 
          data = df_usa[df_usa$pidf=="Independent" & df_usa$repcan==1,],
          cite=FALSE)
x0 = setx(z, cd_depression_numeric = 0)
x1 = setx(z, cd_depression_numeric = 1)
sout = sim(z, x = x0, x1 = x1)
s = rbind(s, c(mean(unlist(sout[["sim.out"]][["x1"]][["fd"]])),
               quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.025),
               quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.975)))

# For party ID==Democrat:
z = zelig(cd_vote_numeric ~ cd_depression_numeric,
          model = "logit", 
          data = df_usa[df_usa$pidf=="Democrat" & df_usa$repcan==1,],
          cite=FALSE)
x0 = setx(z, cd_depression_numeric = 0)
x1 = setx(z, cd_depression_numeric = 1)
sout = sim(z, x = x0, x1 = x1)
s = rbind(s, c(mean(unlist(sout[["sim.out"]][["x1"]][["fd"]])),
               quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.025),
               quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.975)))

# Democratic Candidate
# For party ID==Republican:
z = zelig(cd_vote_numeric ~ cd_depression_numeric,
          model = "logit", 
          data = df_usa[df_usa$pidf=="Republican" & df_usa$repcan==0,],
          cite=FALSE)
x0 = setx(z, cd_depression_numeric = 0)
x1 = setx(z, cd_depression_numeric = 1)
sout = sim(z, x = x0, x1 = x1)
s = rbind(s, c(mean(unlist(sout[["sim.out"]][["x1"]][["fd"]])),
               quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.025),
               quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.975)))

# For party ID==Independent:
z = zelig(cd_vote_numeric ~ cd_depression_numeric,
          model = "logit", 
          data = df_usa[df_usa$pidf=="Independent" & df_usa$repcan==0,],
          cite=FALSE)
x0 = setx(z, cd_depression_numeric = 0)
x1 = setx(z, cd_depression_numeric = 1)
sout = sim(z, x = x0, x1 = x1)
s = rbind(s, c(mean(unlist(sout[["sim.out"]][["x1"]][["fd"]])),
               quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.025),
               quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.975)))

# For party ID==Democrat:
z = zelig(cd_vote_numeric ~ cd_depression_numeric,
          model = "logit", 
          data = df_usa[df_usa$pidf=="Democrat" & df_usa$repcan==0,],
          cite=FALSE)
x0 = setx(z, cd_depression_numeric = 0)
x1 = setx(z, cd_depression_numeric = 1)
sout = sim(z, x = x0, x1 = x1)
s = rbind(s, c(mean(unlist(sout[["sim.out"]][["x1"]][["fd"]])),
               quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.025),
               quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.975)))

cnames = c("Republican Voter",
           "Independent Voter",
           "Democrat Voter")
s1 = s[1:3,]
s2 = s[4:6,]
s1$comparisons = factor(cnames, levels=cnames)
s2$comparisons = factor(cnames, levels=cnames)

ggplot(s1, aes(x = factor(s1$comparisons, levels=rev(cnames)), y = s1$ate,
                ymin = s1$lb, ymax = s1$ub)) +
  geom_pointrange(aes(colour=factor(s1$comparisons, levels=rev(cnames))), size=2, fill="white",shape=22) + theme_bw() + coord_flip() +
  expand_limits(y = c(-0.3, 0.1)) +
  scale_y_continuous(breaks = c(-0.3,-0.2,-0.1,0,0.1)) +
  scale_colour_manual(values=c(185,185,185)) +
  theme(text = element_text(size=14),
        axis.text.y = element_text(size=14),
        legend.position="none") +
  geom_hline(yintercept = 0, linetype = "dashed") +
  ylab("Treatment Effect (95% Confidence Interval)") + xlab("")

ggplot(s2, aes(x = factor(s2$comparisons, levels=rev(cnames)), y = s2$ate,
                ymin = s2$lb, ymax = s2$ub)) +
  geom_pointrange(aes(colour=factor(s2$comparisons, levels=rev(cnames))), size=2, fill="white",shape=22) + theme_bw() + coord_flip() +
  expand_limits(y = c(-0.3, 0.1)) +
  scale_y_continuous(breaks = c(-0.3,-0.2,-0.1,0,0.1)) +
  scale_colour_manual(values=c(185,185,185)) +
  theme(text = element_text(size=14),
        axis.text.y = element_text(size=14),
        legend.position="none") +
  geom_hline(yintercept = 0, linetype = "dashed") +
  ylab("Treatment Effect (95% Confidence Interval)") + xlab("")

#===================================================================#
# Tables A3-A4. Vote Choice by Treatment Group (Logistic Regression)
#===================================================================#
# With categorical treatment variables.
# USA
usa1 = glm(ab_vote_numeric ~ illnessA, data=df_usa, family=binomial(link="logit"))
usa2 = glm(cd_vote_numeric ~ illnessC, data=df_usa, family=binomial(link="logit"))
# Canada
can1 = glm(ab_vote_numeric ~ illnessA, data=df_can, family=binomial(link="logit"))
can2 = glm(cd_vote_numeric ~ illnessC, data=df_can, family=binomial(link="logit"))

stargazer(usa1, usa2, can1, can2, 
          star.cutoffs = c(0.05,0.01,0.001), header=FALSE)

# Using cancer as the base category.
# USA
usa1 = glm(ab_vote_numeric ~ relevel(illnessA, ref="Cancer"), data=df_usa, family=binomial(link="logit"))
usa2 = glm(cd_vote_numeric ~ relevel(illnessC, ref="Skin Cancer"), data=df_usa, family=binomial(link="logit"))
# Canada
can1 = glm(ab_vote_numeric ~ relevel(illnessA, ref="Cancer"), data=df_can, family=binomial(link="logit"))
can2 = glm(cd_vote_numeric ~ relevel(illnessC, ref="Skin Cancer"), data=df_can, family=binomial(link="logit"))

stargazer(usa1, usa2, can1, can2, 
          star.cutoffs = c(0.05,0.01,0.001), header=FALSE)

# With binary treatment variable.
# USA
usa1 = glm(ab_vote_numeric ~ ab_depression_numeric, data=df_usa, family=binomial(link="logit"))
usa2 = glm(cd_vote_numeric ~ cd_depression_numeric, data=df_usa, family=binomial(link="logit"))
# Canada
can1 = glm(ab_vote_numeric ~ ab_depression_numeric, data=df_can, family=binomial(link="logit"))
can2 = glm(cd_vote_numeric ~ cd_depression_numeric, data=df_can, family=binomial(link="logit"))

stargazer(usa1, usa2, can1, can2, 
          star.cutoffs = c(0.05,0.01,0.001), header=FALSE)

#============================================================================#
# Tables A5-A6. Vote Choice and Depression Treatment, with Covariates (USA)
#============================================================================#

# Open Seat session (Table A5)
usa1 = glm(ab_vote_numeric ~ ab_depression_numeric + age + gender + education + dep + pid, data=df_usa, family=binomial(link="logit"))
usa2 = glm(ab_vote_numeric ~ ab_depression_numeric +
            candidate_A_age +
            candidate_A_gender +
            candidate_A_race +
            candidate_A_party +
            candidate_B_age +
            candidate_B_gender +
            candidate_B_race, data=df_usa, family=binomial(link="logit"))
usa3 = glm(ab_vote_numeric ~ ab_depression_numeric +
            candidate_A_age +
            candidate_A_gender +
            candidate_A_race +
            candidate_A_party +
            candidate_B_age +
            candidate_B_gender +
            candidate_B_race +
            age + gender + education + dep + pid
          , data=df_usa, family=binomial(link="logit"))
rownamesab = c("Depression Treatment",
               "Age",
               "Gender",
               "College Degree",
               "Experience with Depression",
               "Republican Identifier",
               "Candidate A Age",
               "Candidate A (Gender = Male)",
               "Candidate A (Race = Hispanic)",
               "Candidate A (Race = African-American) ",
               "Candidate A (Party = Republican)",
               "Candidate B Age",
               "Candidate B (Gender = Male)",
               "Candidate B (Race = Hispanic)",
               "Candidate B (Race = African-American)")
stargazer(usa1, usa2, usa3, star.cutoffs = c(0.05,0.01,0.001), 
          covariate.labels=rownamesab, header=FALSE)

# Incumbent session (Table A6)
usa1 = glm(cd_vote_numeric ~
             cd_depression_numeric + age + gender + education + dep + pid,
           data=df_usa, family=binomial(link="logit"))
usa2 = glm(cd_vote_numeric ~ cd_depression_numeric +
             candidate_C_age +
             candidate_C_gender +
             candidate_C_race +
             candidate_C_party +
             candidate_D_age +
             candidate_D_gender +
             candidate_D_race, data=df_usa, family=binomial(link="logit"))
usa3 = glm(cd_vote_numeric ~ cd_depression_numeric +
             candidate_C_age +
             candidate_C_gender +
             candidate_C_race +
             candidate_C_party +
             candidate_D_age +
             candidate_D_gender +
             candidate_D_race +
             age + gender + education + dep + pid,
           data=df_usa, family=binomial(link="logit"))
rownamescd = c("Depression Treatment",
               "Age",
               "Gender",
               "Education",
               "Experience with Depression",
               "Republican Identifier",
               "Candidate C Age",
               "Candidate C (Gender = Male)",
               "Candidate C (Race = Hispanic)",
               "Candidate C (Race = African-American) ",
               "Candidate C (Party = Republican)",
               "Candidate D Age",
               "Candidate D (Gender = Male)",
               "Candidate D (Race = Hispanic)",
               "Candidate D (Race = African-American)")
stargazer(usa1, usa2, usa3, star.cutoffs = c(0.05,0.01,0.001),  
          covariate.labels=rownamescd, header=FALSE)

#===================================================================#
# Figure A2. Conditional Effect of Depression (USA)
#===================================================================#
# Reset seed
set.seed(42)

# by education (college degree or not):
z = zelig(ab_vote_numeric ~ ab_depression_numeric + age + dep + gender,
          model = "logit", data = df_usa[df_usa$education>=5,],
          cite=FALSE)
x0 = setx(z, ab_depression_numeric = 0)
x1 = setx(z, ab_depression_numeric = 1)
sout = sim(z, x = x0, x1 = x1)
## Accumulating values for plot:
s = data.frame(ate = mean(unlist(sout[["sim.out"]][["x1"]][["fd"]])),
               lb = quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.025),
               ub = quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.975))

z = zelig(ab_vote_numeric ~ ab_depression_numeric + age + dep + gender,
          model = "logit", data = df_usa[df_usa$education<5,],
          cite=FALSE)
x0 = setx(z, ab_depression_numeric = 0)
x1 = setx(z, ab_depression_numeric = 1)
sout = sim(z, x = x0, x1 = x1)
s = rbind(s, c(mean(unlist(sout[["sim.out"]][["x1"]][["fd"]])),
               quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.025),
               quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.975)))

# by experience with depression:
z = zelig(ab_vote_numeric ~ ab_depression_numeric + age + gender + education,
          model = "logit", data = df_usa[df_usa$dep==1,],
          cite=FALSE)
x0 = setx(z, ab_depression_numeric = 0)
x1 = setx(z, ab_depression_numeric = 1)
sout = sim(z, x = x0, x1 = x1)
s = rbind(s, c(mean(unlist(sout[["sim.out"]][["x1"]][["fd"]])),
               quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.025),
               quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.975)))

z = zelig(ab_vote_numeric ~ ab_depression_numeric + age + gender + education,
          model = "logit", data = df_usa[df_usa$dep==0,],
          cite=FALSE)
x0 = setx(z, ab_depression_numeric = 0)
x1 = setx(z, ab_depression_numeric = 1)
sout = sim(z, x = x0, x1 = x1)
s = rbind(s, c(mean(unlist(sout[["sim.out"]][["x1"]][["fd"]])),
               quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.025),
               quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.975)))

# by age group:
z = zelig(ab_vote_numeric ~ ab_depression_numeric + gender + dep + education,
          model = "logit", data = df_usa[df_usa$age>=40,],
          cite=FALSE)
x0 = setx(z, ab_depression_numeric = 0)
x1 = setx(z, ab_depression_numeric = 1)
sout = sim(z, x = x0, x1 = x1)
s = rbind(s, c(mean(unlist(sout[["sim.out"]][["x1"]][["fd"]])),
               quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.025),
               quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.975)))

z = zelig(ab_vote_numeric ~ ab_depression_numeric + gender + dep + education,
          model = "logit", data = df_usa[df_usa$age<40,],
          cite=FALSE)
x0 = setx(z, ab_depression_numeric = 0)
x1 = setx(z, ab_depression_numeric = 1)
sout = sim(z, x = x0, x1 = x1)
s = rbind(s, c(mean(unlist(sout[["sim.out"]][["x1"]][["fd"]])),
               quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.025),
               quantile(unlist(sout[["sim.out"]][["x1"]][["fd"]]), 0.975)))

cnames = c("College Degree",
           "No College Degree",
           "Experience with Depression",
           "No Prior Experience",
           "Aged 40 and Above",
           "Less than 40")
s$comparisons = factor(cnames, levels=cnames)

ggplot(s, aes(x = factor(s$comparisons, levels=rev(cnames)), y = s$ate,
       ymin = s$lb, ymax = s$ub)) +
  geom_pointrange(size=1, color="black", fill="white",shape=22) + theme_bw() + coord_flip() +
  expand_limits(y = c(-0.3, 0.1)) +
  scale_y_continuous(breaks = c(-0.3,-0.2,-0.1,0,0.1)) +
  theme(text = element_text(size=12)) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  ylab("Conditional Average Treatment Effect (95% Confidence Interval)") + xlab("")

#======================================================================#
# Figure A3. Vote Percentages Across Experimental Groups (Canada)
#======================================================================#

dtable1 = pTable(df_can$ab_vote, df_can$illnessA, digits = 3)
dtable1 = data.frame(dtable1)[1:3,]
dtable2 = pTable(df_can$cd_vote, df_can$illnessC, digits = 3)
dtable2 = data.frame(dtable2)[1:3,]

# Figure A3a
ggplot(data = dtable1, aes(x = v2, y = Freq)) + theme_bw() +
  geom_hline(yintercept=0.5,lty=2) +
  geom_bar(stat="identity", color=185, fill=185, lwd=1.25,
           width=0.65,
           alpha=0.1,position=position_dodge()) +
  theme(axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        axis.text.x = element_text(size=14),
        axis.title.x = element_text(size=12, margin=margin(20,0,0,0)),
        axis.title.y = element_text(size=12),
        legend.position="none",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.spacing = unit(250, "line")) +
  ylab("Vote Proportions") +
  xlab("Medical Condition (Experimental Group)") +
  labs(fill = "") +
  expand_limits(y = c(0, .7)) +
  geom_text(aes(label = paste(round(Freq*100,1),"%",sep="")),
            position=position_dodge(width=0.75), size=5, vjust=-2.5)


# Figure A3b
ggplot(data = dtable2, aes(x = v2, y = Freq)) + theme_bw() +
  geom_hline(yintercept=0.5,lty=2) +
  geom_bar(stat="identity", color=185, fill=185, lwd=1.25,
           width=0.65,
           alpha=0.1,position=position_dodge()) +
  theme(axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        axis.text.x = element_text(size=14),
        axis.title.x = element_text(size=12, margin=margin(20,0,0,0)),
        axis.title.y = element_text(size=12),
        legend.position="none",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.spacing = unit(250, "line")) +
  ylab("Vote Proportions") +
  xlab("Medical Condition (Experimental Group)") +
  labs(fill = "") +
  expand_limits(y = c(0, .7)) +
  geom_text(aes(label = paste(round(Freq*100,1),"%",sep="")),
            position=position_dodge(width=0.75), size=5, vjust=-2.5)

#===================================================================#
# Tables A7-A8. Logistic Regressions with Covariates (Canada)
#===================================================================#

# Open Seat session (Table 6)
can1 = glm(ab_vote_numeric ~ ab_depression_numeric + age + gender + education + dep, data=df_can, family=binomial(link="logit"))
can2 = glm(ab_vote_numeric ~ ab_depression_numeric +
            candidate_A_age +
            candidate_A_gender +
            candidate_A_race +
            candidate_A_party +
            candidate_B_age +
            candidate_B_gender +
            candidate_B_race, data=df_can, family=binomial(link="logit"))
can3 = glm(ab_vote_numeric ~ ab_depression_numeric +
            candidate_A_age +
            candidate_A_gender +
            candidate_A_race +
            candidate_A_party +
            candidate_B_age +
            candidate_B_gender +
            candidate_B_race +
            age + gender + education + dep
          , data=df_can, family=binomial(link="logit"))
rownamesab = c("Depression Treatment",
               "Age",
               "Gender",
               "Education",
               "Experience with Depression",
               "Candidate A Age",
               "Candidate A (Gender = Male)",
               "Candidate A (Race = African-Canadian)",
               "Candidate A (Race = Asian Canadian) ",
               "Candidate A (Party = Liberal)",
               "Candidate B Age",
               "Candidate B (Gender = Male)",
               "Candidate B (Race = African-Canadian)",
               "Candidate B (Race = Asian Canadian)")
stargazer(can1, can2, can3, star.cutoffs = c(0.05,0.01,0.001), 
          covariate.labels=rownamesab, header=FALSE)

# Incumbent session (Table 7)
can1 = glm(cd_vote_numeric ~
             cd_depression_numeric + age + gender + education + dep,
           data=df_can, family=binomial(link="logit"))
can2 = glm(cd_vote_numeric ~ cd_depression_numeric +
             candidate_C_age +
             candidate_C_gender +
             candidate_C_race +
             candidate_C_party +
             candidate_D_age +
             candidate_D_gender +
             candidate_D_race, data=df_can, family=binomial(link="logit"))
can3 = glm(cd_vote_numeric ~ cd_depression_numeric +
             candidate_C_age +
             candidate_C_gender +
             candidate_C_race +
             candidate_C_party +
             candidate_D_age +
             candidate_D_gender +
             candidate_D_race +
             age + gender + education + dep,
           data=df_can, family=binomial(link="logit"))
rownamescd = c("Depression Treatment",
               "Age",
               "Gender",
               "Education",
               "Experience with Depression",
               "Candidate C Age",
               "Candidate C (Gender = Male)",
               "Candidate C (Race = African-Canadian)",
               "Candidate C (Race = Asian Canadian) ",
               "Candidate C (Party = Liberal)",
               "Candidate D Age",
               "Candidate D (Gender = Male)",
               "Candidate D (Race = African-Canadian)",
               "Candidate D (Race = Asian Canadian) ")
stargazer(can1, can2, can3, star.cutoffs = c(0.05,0.01,0.001),  
          covariate.labels=rownamescd, header=FALSE)

#===================================================================#
# Tables A9-A12. Character Evaluations as Dependent Variables
#===================================================================#

# Open Seat, USA
usa1 = glm(ab_prepared_n ~ ab_depression_numeric, data=df_usa, family=binomial(link="logit"))
usa2 = glm(ab_trust_n ~ ab_depression_numeric, data=df_usa, family=binomial(link="logit"))
usa3 = glm(ab_character_n ~ ab_depression_numeric, data=df_usa, family=binomial(link="logit"))
stargazer(usa1, usa2, usa3, 
          star.cutoffs = c(0.05,0.01,0.001), header=FALSE)

# Incumbent, USA
usa1 = glm(cd_prepared_n ~ cd_depression_numeric, data=df_usa, family=binomial(link="logit"))
usa2 = glm(cd_trust_n ~ cd_depression_numeric, data=df_usa, family=binomial(link="logit"))
usa3 = glm(cd_character_n ~ cd_depression_numeric, data=df_usa, family=binomial(link="logit"))
stargazer(usa1, usa2, usa3, 
          star.cutoffs = c(0.05,0.01,0.001), header=FALSE)

# Open Seat, Canada
can1 = glm(ab_prepared_n ~ ab_depression_numeric, data=df_can, family=binomial(link="logit"))
can2 = glm(ab_trust_n ~ ab_depression_numeric, data=df_can, family=binomial(link="logit"))
can3 = glm(ab_character_n ~ ab_depression_numeric, data=df_can, family=binomial(link="logit"))
stargazer(can1, can2, can3, 
          star.cutoffs = c(0.05,0.01,0.001), header=FALSE)

# Incumbent, Canada
can1 = glm(cd_prepared_n ~ cd_depression_numeric, data=df_can, family=binomial(link="logit"))
can2 = glm(cd_trust_n ~ cd_depression_numeric, data=df_can, family=binomial(link="logit"))
can3 = glm(cd_character_n ~ cd_depression_numeric, data=df_can, family=binomial(link="logit"))
stargazer(can1, can2, can3, 
          star.cutoffs = c(0.05,0.01,0.001), header=FALSE)

# Alternative Outcome Variables (Canada) - Same format as main text; Not used.

set.seed(42)
z1 = zelig(ab_character_n ~ ab_depression_numeric, 
           model = "logit", data = df_can,
           cite=FALSE)
x0 = setx(z1, ab_depression_numeric = 0)
x1 = setx(z1, ab_depression_numeric = 1)
sout1 = sim(z1, x = x0, x1 = x1)

z2 = zelig(ab_trust_n ~ ab_depression_numeric, 
           model = "logit", data = df_can,
           cite=FALSE)
x0 = setx(z2, ab_depression_numeric = 0)
x1 = setx(z2, ab_depression_numeric = 1)
sout2 = sim(z2, x = x0, x1 = x1)

z3 = zelig(ab_prepared_n ~ ab_depression_numeric, 
           model = "logit", data = df_can,
           cite=FALSE)
x0 = setx(z3, ab_depression_numeric = 0)
x1 = setx(z3, ab_depression_numeric = 1)
sout3 = sim(z3, x = x0, x1 = x1)

z4 = zelig(cd_character_n ~ cd_depression_numeric, 
           model = "logit", data = df_can,
           cite=FALSE)
x0 = setx(z4, cd_depression_numeric = 0)
x1 = setx(z4, cd_depression_numeric = 1)
sout4 = sim(z4, x = x0, x1 = x1)

z5 = zelig(cd_trust_n ~ cd_depression_numeric, 
           model = "logit", data = df_can,
           cite=FALSE)
x0 = setx(z5, cd_depression_numeric = 0)
x1 = setx(z5, cd_depression_numeric = 1)
sout5 = sim(z5, x = x0, x1 = x1)

z6 = zelig(cd_prepared_n ~ cd_depression_numeric, 
           model = "logit", data = df_can,
           cite=FALSE)
x0 = setx(z6, cd_depression_numeric = 0)
x1 = setx(z6, cd_depression_numeric = 1)
sout6 = sim(z6, x = x0, x1 = x1)

# Creating table
results = matrix(nrow=6,ncol=3)
index = 1
for (i in c(sout1,sout3,sout2,sout4,sout6,sout5)){
  results[index, 1] = mean(unlist(i[["sim.out"]][["x1"]][["fd"]]))
  results[index, 2] = quantile(unlist(i[["sim.out"]][["x1"]][["fd"]]),0.025)
  results[index, 3] = quantile(unlist(i[["sim.out"]][["x1"]][["fd"]]),0.975)
  index = index + 1
}

for (i in 1:nrow(results)){
  cat(round(results[i,1],3),"\n(",round(results[i,2],3),", ",round(results[i,3],3),")\n", sep="")
}

#===================================================================#
# Table A13. Interactions with Leave of Absence and Missed Votes
#===================================================================#

usa1 = glm(ab_vote_numeric ~ ab_depression_numeric*leave, data=df_usa, family=binomial(link="logit"))
usa2 = glm(cd_vote_numeric ~ cd_depression_numeric*missed, data=df_usa, family=binomial(link="logit"))
can1 = glm(ab_vote_numeric ~ ab_depression_numeric*leave, data=df_can, family=binomial(link="logit"))
can2 = glm(cd_vote_numeric ~ cd_depression_numeric*missed, data=df_can, family=binomial(link="logit"))

stargazer(usa1, usa2, can1, can2, 
          star.cutoffs = c(0.05,0.01,0.001), header=FALSE)

#===================================================================#
# Additional result mentioned in text: % of Missed Votes (USA)
#===================================================================#
# Additional significance test:
# Effect of depression, for subset of candidates having missed fewer 
# than 20% of votes (Incumbent Session, USA). 
usa = glm(cd_vote_numeric ~ cd_depression_numeric, data=df_usa[df_usa$missed<20,], 
          family=binomial(link="logit"))
summary(usa)

# Size of treatment effect:
set.seed(42)
usa = zelig(cd_vote_numeric ~ cd_depression_numeric, 
            model = "logit", data = df_usa[df_usa$missed<20,],
            cite=FALSE)
x0 = setx(usa, cd_depression_numeric = 0)
x1 = setx(usa, cd_depression_numeric = 1)
sout = sim(usa, x = x0, x1 = x1)
summary(sout)

# With a linear model:
usa_linear = lm(cd_vote_numeric ~ cd_depression_numeric, data=df_usa[df_usa$missed<20,])
summary(usa_linear)

#===================================================================#
# Figure A4. Predicted Probability by Leave of Absence (Canada)
#===================================================================#
# Open Seat session.
set.seed(42)

df_can$DepressionA = ifelse(df_can$illnessA=="Depression",1,0)
df_can$CancerA = ifelse(df_can$illnessA=="Cancer",1,0)

# Requires a coefficient constraint for identification.
z = zelig(ab_vote_numeric ~ DepressionA*leave + CancerA*leave - leave, 
          data=df_can,
          model="logit",
          cite=FALSE)
x0 = setx(z,CancerA=1,DepressionA=0,leave=c(1,2,6))
sout = sim(z, x = x0)
x1 = setx(z,CancerA=0,DepressionA=1,leave=c(1,2,6))
sout1 = sim(z, x = x1)

# Creating plot of predicted probabilities:
results = matrix(nrow=6,ncol=3)
for (i in 0:2){
  xstart = 1 + (i*2000)
  xend = 1000 + (i*2000)
  results[i+1, 1] = mean(unlist(sout[["sim.out"]][["range"]])[xstart:xend])
  results[i+1, 2] = quantile(unlist(sout[["sim.out"]][["range"]])[xstart:xend],0.025)
  results[i+1, 3] = quantile(unlist(sout[["sim.out"]][["range"]])[xstart:xend],0.975)
}
for (i in 0:2){
  xstart = 1 + (i*2000)
  xend = 1000 + (i*2000)
  results[i+4, 1] = mean(unlist(sout1[["sim.out"]][["range"]])[xstart:xend])
  results[i+4, 2] = quantile(unlist(sout1[["sim.out"]][["range"]])[xstart:xend],0.025)
  results[i+4, 3] = quantile(unlist(sout1[["sim.out"]][["range"]])[xstart:xend],0.975)
}
results=data.frame(results)
names(results) = c("prob","lb","ub")
cnames=c("Cancer: 1 Week Leave",
         "Cancer: 2 Week Leave",
         "Cancer: 6 Week Leave",
         "Depression: 1 Week Leave",
         "Depression: 2 Week Leave",
         "Depression: 6 Week Leave")
results$comparisons = cnames
results$comparisons = factor(results$comparisons,
                             levels=cnames,
                             ordered=TRUE)
ccolor = c(rep(185,6))

ggplot(results, aes(y = factor(comparisons, levels=rev(cnames)), x = prob,
                    xmin = lb, xmax = ub)) + theme_bw() +
  geom_pointrangeh(size=2, color=ccolor, fill = "white" ,shape=22) +
  xlim(c(0,1)) +
  geom_hline(yintercept = 3.5, lty=2) +
  theme(text = element_text(size=12),
        axis.text.y = element_text(size=14)) +
  xlab("P(Voting for Candidate A)") + ylab("")

#===================================================================#
# Figure A5. Predicted Probability by Missed Votes (Canada)
#===================================================================#

set.seed(42)
z = zelig(cd_vote_numeric ~ illnessC*missed, data=df_can,
          model="logit", cite=FALSE)
x0 = setx(z,illnessC=c("Flu","Skin Cancer","Depression"),missed=c(0,10,20,30))
sout = sim(z, x = x0)

# Creating plot of predicted probabilities:
results = matrix(nrow=12,ncol=3)
for (i in 0:11){
  xstart = 1 + (i*2000)
  xend = 1000 + (i*2000)
  results[i+1, 1] = mean(unlist(sout[["sim.out"]][["range"]])[xstart:xend])
  results[i+1, 2] = quantile(unlist(sout[["sim.out"]][["range"]])[xstart:xend],0.025)
  results[i+1, 3] = quantile(unlist(sout[["sim.out"]][["range"]])[xstart:xend],0.975)
}
results=data.frame(results)
names(results) = c("prob","lb","ub")
results$comparisons = c("Flu: 0% Missed",
                        "Skin Cancer: 0% Missed",
                        "Depression: 0% Missed",
                        "Flu: 10% Missed",
                        "Skin Cancer: 10% Missed",
                        "Depression: 10% Missed",
                        "Flu: 20% Missed",
                        "Skin Cancer: 20% Missed",
                        "Depression: 20% Missed",
                        "Flu: 30% Missed",
                        "Skin Cancer: 30% Missed",
                        "Depression: 30% Missed")
# Reordering categories for display:
cnames=c("Flu: 0% Missed",
         "Flu: 10% Missed",
         "Flu: 20% Missed",
         "Flu: 30% Missed",
         "Skin Cancer: 0% Missed",
         "Skin Cancer: 10% Missed",
         "Skin Cancer: 20% Missed",
         "Skin Cancer: 30% Missed",
         "Depression: 0% Missed",
         "Depression: 10% Missed",
         "Depression: 20% Missed",
         "Depression: 30% Missed")
results$comparisons = factor(results$comparisons,
                             levels=cnames,
                             ordered=TRUE)
ccolor = c(rep(185,12))

ggplot(results, aes(y = factor(comparisons, levels=rev(cnames)), x = prob,
                    xmin = lb, xmax = ub)) + theme_bw() +
  geom_pointrangeh(size=2, color=ccolor, fill = "white" ,shape=22) +
  xlim(c(0,1)) +
  geom_hline(yintercept = 4.5, lty=2) +
  geom_hline(yintercept = 8.5, lty=2) +
  theme(text = element_text(size=12),
        axis.text.y = element_text(size=14)) +
  xlab("P(Voting for Incumbent)") + ylab("")
